Notes:
Import libraries
import os
import fa2
import sys
import loompy
import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
from matplotlib import rcParams
import scanpy as sc
sc.logging.print_versions()
Set parameters
# Change directory
os.getcwd()
os.chdir('/Users/rmevel/jupyter/scanpy_mevel_et_al/sc_explant_dataset/Explant_final/')
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=100, frameon=False, figsize=(4,4))
results_file = './write/Mevel_et_al_sc_explant.h5ad'
We use the loom file generated using as.loom from Seurat. Use scVelo to import seurat object as read_loom is failing in scanpy.
Important: Make sure the file is not used somewhere else (R or other notebook) as this will cause it to fail opening.
# scanpy read function
adata = sc.read_loom("/Users/rmevel/r/sc_mevel_et_al/sc_explant_dataset/loom/mevel_et_al_sc_explant_sce_final.loom")
# Print content
adata
Let us work with a higher precision than the default ‘float32’ to ensure exactly the same results across different computational platforms.
adata.X = adata.X.astype('float64') # this is not required and results will be comparable without it
Check presence of dimensionally reduced data, and plot umap
adata.obsm
Define sample colors
adata.uns['sample_colors'] = ["#DC2B19", "#F7A400", "#72ABF8", "#434CA7"]
adata.uns['in_silico_clusters_colors'] = ["#F09900", "#F1BA88", "#D63A69", "#9DDED6", "#8DD593", "#428654", "#B2B8E0", "#4A6FE3", "#1037AA"]
adata.uns
Plot previously computed embeddings
sc.pl.embedding(adata, basis='umap_cell_embeddings', color=['sample', 'in_silico_clusters'], title="UMAP")
sc.pl.embedding(adata, basis='DiffusionMap_cell_embeddings', color=['sample', 'in_silico_clusters'], title="Diffusion Map")
Log transform the data for plotting
sc.pp.log1p(adata)
adata
We use a csv to import the components because Seurat as.loom function only saved the 2 first DCs
We can double check number of dimensions in the imported from seurat diffusion map:
adata.obsm['DiffusionMap_cell_embeddings'].ndim
dm20 = pd.read_csv("/Users/rmevel/r/sc_mevel_et_al/sc_explant_dataset/r_save/diffmap20_final.csv", index_col=0)
dm10 = dm20[dm20.columns[0:10]]
dm5 = dm20[dm20.columns[0:5]]
adata.obsm['seu_diffmap20'] = dm20.values
adata.obsm['seu_diffmap10'] = dm10.values
adata.obsm['seu_diffmap5'] = dm5.values
adata.obsm
import copy
sdata = copy.deepcopy(adata)
### Use this if need to copy object back
adata = copy.deepcopy(sdata)
### Use this if need to copy object back
adata = copy.deepcopy(sdata)
Use the first 5 diffusion map components calculated before in Seurat to denoise the graph.
sc.pp.neighbors(adata, n_neighbors=20, use_rep='seu_diffmap10')
Compute PAGA using previously computed clusters
sc.tl.paga(adata, groups='in_silico_clusters')
sc.pl.paga(adata, color=['in_silico_clusters'])
# Make figures
rcParams['figure.figsize'] = 4,4
sc.pl.paga(adata, plot=True, color=['in_silico_clusters'],
node_size_scale=1.2, node_size_power=1.2, edge_width_scale=1,
fontsize=8, save=False, threshold=0.0)
pos1 = adata.uns['paga']['pos']
np.savetxt('pos1.csv', pos1, delimiter=',')
pos1 = np.loadtxt('pos1.csv', delimiter=',')
sc.tl.draw_graph(adata, init_pos='paga')
sc.pl.draw_graph(adata,
color=['in_silico_clusters', 'sample', 'Runx1', 'Nkx3-1', 'Meis2', 'Shh', 'Notch1', 'Top2a', 'Krt4', 'Psca', 'Krt14', 'Dcn'],
legend_loc='on data')
rcParams['figure.figsize'] = 5,5
sc.pl.paga_compare(
adata, threshold=0.0, title='', right_margin=0.2, size=10, edge_width_scale=1,
legend_fontsize=12, fontsize=12, frameon=False, edges=True, save=False, add_pos=True)
Stop here
adata.uns['paga']['pos']. # Check data is present
adata.uns['paga']
Follow script available here: https://lazappi.github.io/phd-thesis-analysis/05-paga.html
import os
import json
out_dir = 'output/paga_diffmap10'
if not os.path.exists(out_dir):
os.makedirs(out_dir)
def con2edges(con, names=None, sparse=True):
print('Converting connectivity matrix to edges...')
n = con.shape[0]
edges = pd.DataFrame(columns=['From', 'To', 'Connectivity'])
for i in range(n):
for j in range(i + 1, n):
if names is not None:
fr = names[i]
to = names[j]
else:
fr = str(i)
to = str(j)
connectivity = con[i, j]
if sparse and connectivity == 0:
continue
entry = {'From' : fr, 'To' : to,
'Connectivity' : con[i, j]}
edges = edges.append(entry, ignore_index=True)
return edges
print('Outputting cluster edges...')
clust_con = adata.uns['paga']['connectivities'].toarray()
clust_edges = con2edges(clust_con)
clust_edges.to_csv(os.path.join(out_dir, 'cluster_edges.csv'),
index=False)
print('Outputting cluster tree edges...')
clust_tree_con = adata.uns['paga']['connectivities_tree'].toarray()
clust_tree_edges = con2edges(clust_tree_con)
clust_tree_edges.to_csv(os.path.join(out_dir, 'cluster_tree_edges.csv'),
index=False)
print('Outputting cluster embedding...')
clust_embedding = pd.DataFrame(adata.uns['paga']['pos'], columns=['X', 'Y'])
clust_embedding['in_silico_clusters'] = range(clust_embedding.shape[0])
clust_embedding = clust_embedding[['in_silico_clusters', 'X', 'Y']]
clust_embedding.to_csv(os.path.join(out_dir, 'cluster_embedding.csv'),
index=False)
print('Outputting cell edges...')
cells = adata.obs['Cell']
cell_con = adata.uns['neighbors']['connectivities']
cell_edges = pd.DataFrame(columns=['From', 'To', 'Connectivity'])
n_rows = len(cell_con.indptr)
for i in range(len(cell_con.indptr) - 1):
row_ind = cell_con.indices[cell_con.indptr[i]:cell_con.indptr[i + 1]]
print(f'\r\tRow {i} of {n_rows}', end='')
for k, j in enumerate(row_ind):
if j > i:
con = cell_con.data[cell_con.indptr[i] + k]
fr = cells[i]
to = cells[j]
entry = {'From' : fr, 'To' : to, 'Connectivity' : con}
cell_edges = cell_edges.append(entry, ignore_index=True)
print('\n')
cell_edges.to_csv(os.path.join(out_dir, 'cell_edges.csv'),
index=False)
print('Outputting cell embedding...')
x = adata.obsm['X_draw_graph_fa'][:, 0]
y = adata.obsm['X_draw_graph_fa'][:, 1]
cell_embedding = pd.DataFrame({'Cell' : cells, 'X' : x, 'Y' : y})
cell_embedding.to_csv(os.path.join(out_dir, 'cell_embedding.csv'),
index=False)
print('Done!')
Define root and compute pseudotime
adata.uns['iroot'] = np.flatnonzero(adata.obs['in_silico_clusters'] == 'C0')[0]
sc.tl.dpt(adata)
sc.pl.draw_graph(adata, color=['dpt_pseudotime', 'in_silico_clusters', 'sample'])
out_dir = 'output/paga_diffmap10'
dpt = pd.DataFrame(adata.obs['dpt_pseudotime'])
dpt.to_csv(os.path.join(out_dir, 'dpt_pseudotime.csv'), index=True)